pdata_sharing <- df %>%
filter(!fund.on.data.history.) %>%
select(.run.number., .step., share.data.,
mean.grants.groups:sum..total.primary.publications..of.groups) %>%
pivot_longer(-c(.run.number., .step., share.data.)) %>%
drop_na()
pdata_sharing %>%
filter(str_detect(name, "gini")) %>%
ggplot(aes(.step., value, colour = share.data.)) +
geom_smooth() +
facet_wrap(vars(name), nrow = 2)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
inequality is lower when sharing data
p <- pdata_sharing %>%
filter(str_detect(name, "gini")) %>%
ggplot(aes(.step., value, colour = share.data., group = .run.number.)) +
geom_line() +
facet_wrap(vars(name), nrow = 2)
plotly::ggplotly(p)
# data sharing with funding reward
data_funding <- df %>%
filter(share.data.) %>%
select(.run.number., .step., fund.on.data.history.,
mean.grants.groups:sum..total.primary.publications..of.groups) %>%
pivot_longer(-c(.run.number., .step., fund.on.data.history.)) %>%
drop_na()
data_funding %>%
filter(str_detect(name, "gini")) %>%
ggplot(aes(.step., value, colour = fund.on.data.history.)) +
geom_smooth() +
facet_wrap(vars(name), nrow = 2)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# inequality is higher when sharing data
p <- data_funding %>%
filter(str_detect(name, "gini")) %>%
ggplot(aes(.step., value, colour = fund.on.data.history., group = .run.number.)) +
geom_line() +
facet_wrap(vars(name), nrow = 2)
plotly::ggplotly(p)
# compare three
comparison <- df %>%
select(.run.number., .step., fund.on.data.history., share.data.,
mean.grants.groups:sum..total.datasets..of.groups) %>%
pivot_longer(-c(.run.number., .step., fund.on.data.history., share.data.)) %>%
drop_na()
comparison %>%
mutate(experiment = case_when(
!share.data. & !fund.on.data.history. ~ "no sharing",
share.data. & !fund.on.data.history. ~ "only sharing",
share.data. & fund.on.data.history. ~ "share and reward",
TRUE ~ NA_character_
)) %>%
select(-share.data., -fund.on.data.history.) %>%
drop_na() -> comparison
comparison %>%
filter(str_detect(name, "gini")) %>%
ggplot(aes(.step., value, colour = experiment)) +
geom_smooth() +
facet_wrap(vars(name), nrow = 2)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
almost no difference: explanation: publications and datasets are closely linked. (runs with more publications also have more datasets). those that are successful are also those that share (because one funder demands it). also chance/error still large influence. could play with number of funders, share of funders that mandates data
Next thing to look at: play with rate of data vs pub history, and display the fraction of data and normal grants.
does funding based on history lead to higher concentration? i.e., groups that got grants with data sharing tend to get more grants of the same sort?